library(tidyverse)
library(here)
library(arrow)
library(sf)
library(urbnmapr)
library(naniar)
library(janitor)
library(ggiraph)
options(scipen = 99)Treatment Mapping
Load Libraries
Today’s Data
The data we will analyze today is SAMHSA’s TEDS-D Dataset. The metadata can be found here
Reading in feather files with arrow
teds_d <- read_parquet(here("data/teds_d_lecture.parquet"))Clean names
teds_d <- teds_d %>%
clean_names()Selecting for relevant columns for today’s class
State
Frequency of use at discharge
Treatment Service
Length of Stay
Reason for Discharge
teds_d_select <- teds_d %>%
select(freq1_d, stfips, services_d, los, reason)write_parquet(teds_d_select, here("data/teds_d_lecture.parquet"))teds_d_select <- read_parquet(here("data/teds_d_lecture.parquet"))NA Analysis
How does the documentation label missing data?
teds_d_select[teds_d_select == "-9"] <- NAmiss_var_summary(teds_d_select)# A tibble: 5 × 3
variable n_miss pct_miss
<chr> <int> <num>
1 freq1_d 7263891 51.8
2 services_d 4715728 33.6
3 reason 140 0.000997
4 los 18 0.000128
5 stfips 0 0
Variable Re-coding
Frequency of Use at Discharge
teds_d_select$freq1_d <- as.character(teds_d_select$freq1_d)
teds_d_select$freq1_d[teds_d_select$freq1_d == "1"] <- "no use"
teds_d_select$freq1_d[teds_d_select$freq1_d == "2"] <- "some use"
teds_d_select$freq1_d[teds_d_select$freq1_d == "3"] <- "daily use"
teds_d_select$freq1_d[is.na(teds_d_select$freq1_d)] <- "unknown"Services
teds_d_select$services_d <- as.character(teds_d_select$services_d)
teds_d_select$services_d[teds_d_select$services_d == "1"] <- "Detox, 24-hour, hospital inpatient"
teds_d_select$services_d[teds_d_select$services_d == "2"] <- "Detox, 24-hour, free-standing residential"
teds_d_select$services_d[teds_d_select$services_d == "3"] <- "Rehab/residential, hospital (non-detox)"
teds_d_select$services_d[teds_d_select$services_d == "4"] <- "Rehab/residential, short term (30 days or fewer)"
teds_d_select$services_d[teds_d_select$services_d == "5"] <- "Rehab/residential, long term (more than 30 days)"
teds_d_select$services_d[teds_d_select$services_d == "6"] <- "Ambulatory, intensive outpatient"
teds_d_select$services_d[teds_d_select$services_d == "7"] <- "Ambulatory, non-intensive outpatient"
teds_d_select$services_d[teds_d_select$services_d == "8"] <- "Ambulatory, detoxification"
teds_d_select$services_d[is.na(teds_d_select$services_d)] <- "unknown"Reason
teds_d_select$reason <- as.character(teds_d_select$reason)
teds_d_select$reason[teds_d_select$reason == "1"] <- "completed"
teds_d_select$reason[teds_d_select$reason == "2"] <- "dropped out"
teds_d_select$reason[teds_d_select$reason == "3"] <- "terminated by facility"
teds_d_select$reason[teds_d_select$reason == "4"] <- "transfered"
teds_d_select$reason[teds_d_select$reason == "5"] <- "incarcerated"
teds_d_select$reason[teds_d_select$reason == "6"] <- "death"
teds_d_select$reason[teds_d_select$reason == "7"] <- "other"Mapping
We want to map the percentage of complete treatments by state
First, let’s calculate the percentage of completed treatments by state
percent_completed_by_state <- teds_d_select %>%
group_by(stfips) %>%
summarize(
total_cases = n(),
completed_cases = sum(reason == "completed", na.rm = TRUE)
) %>%
mutate(percentage_completed = (completed_cases / total_cases) * 100)Next, let’s bring in some mapping data
states_map <- get_urbn_map(map = "states", sf = TRUE)What do we notice that’s different between the teds-d stfips column and the states_map stfips column?
percent_completed_by_state$stfips_recode <- sprintf('%02d', percent_completed_by_state$stfips)colnames(percent_completed_by_state)[colnames(percent_completed_by_state) == "stfips_recode"] <- "state_fips"Joining data
percent_completed_by_state_map <- full_join(percent_completed_by_state,
states_map,
by = "state_fips")old-style crs object detected; please recreate object with a recent sf::st_crs()
Plotting Map
ggplot(percent_completed_by_state_map) +
geom_sf(percent_completed_by_state_map,
mapping = aes(geometry = geometry, fill = percentage_completed),
color = "#ffffff", size = 0.25) +
labs(fill = "% of Completed Treatment Episodes") +
coord_sf(datum = NA)+
theme_minimal() 
Making interactive with ggiprah
interactive_completed_treatment_map <- ggplot(percent_completed_by_state_map) +
geom_sf_interactive(
mapping = aes(
geometry = geometry,
fill = percentage_completed,
tooltip = paste("State FIPS:", stfips, "State Name:", state_name, "<br>Completed:", percentage_completed, "%")
),
color = "#ffffff",
size = 0.25
) +
labs(fill = "% of Completed Treatment Episodes") +
coord_sf(datum = NA) +
theme_minimal()
# Use `girafe` to render the interactive plot
girafe(ggobj = interactive_completed_treatment_map)Round & Add state name to tooltip
Adding color bins
percent_completed_by_state_map <- percent_completed_by_state_map %>%
mutate(percentage_bin = cut(percentage_completed, breaks=c(0, 10,20,30,40,50, 60, 70, 80)))
ggplot(percent_completed_by_state_map) +
geom_sf(mapping = aes(geometry = geometry, fill = percentage_bin),
color = "#ffffff", size = 0.25) +
labs(fill = "% of CompletedTreatment Episodes",
title = "Completed Treatment Episodes by State",
subtitle = "TEDS-D Dataset (SAMHSA)") +
scale_fill_viridis_d(option = "D") +
coord_sf(datum = NA) +
theme_minimal() +
theme(
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
legend.text = element_text(size = 4),
legend.title = element_text(size = 5),
strip.text = element_text(size = 4)
) 
Assignment
Make an interactive map with
ggiraphshowing the percentage of completed treatments that end with no use at discharge (freq1_d)–> no use at discharge / completed treatments
percent_completed_by_states_map <- full_join(percent_completed_by_state,
states_map,
by = "state_fips")old-style crs object detected; please recreate object with a recent sf::st_crs()
ggplot(percent_completed_by_state_map) +
geom_sf(percent_completed_by_state_map,
mapping = aes(geometry = geometry, fill = percentage_completed),
color = "#ffffff", size = 0.25) +
labs(fill = "% of Completed Treatment Episodes") +
coord_sf(datum = NA)+
theme_minimal() 
percent_no_use_discharge <- teds_d_select %>%
group_by(stfips) %>%
summarize(
total_cases = n(),
completed_cases = sum(reason == "completed", na.rm = TRUE),
completed_no_use_discharge = sum(freq1_d == "no use" & reason == "completed", na.rm = TRUE)
) %>%
mutate(percentage_no_use = (completed_no_use_discharge / completed_cases) * 100)percent_no_use_discharge$stfips_recode <- sprintf('%02d',percent_no_use_discharge$stfips)colnames(percent_no_use_discharge)[colnames(percent_no_use_discharge) == "stfips_recode"] <- "state_fips"percent_no_use_discharge_map <- full_join(percent_no_use_discharge,
states_map,
by = "state_fips")old-style crs object detected; please recreate object with a recent sf::st_crs()
interactive_no_use_discharge_map <- ggplot(percent_no_use_discharge_map) +
geom_sf_interactive(
mapping = aes(
geometry = geometry,
fill = percentage_no_use,
tooltip = paste("State FIPS:", stfips, "<br>Completed with no use:", round(percentage_no_use, 2), "%", "<br>State:", state_name)
),
color = "#ffffff",
size = 0.1
) +
labs(fill = "% of Completed Treatment Resulting in No Use",
title = "Completed Treatments that end with No Use at Discharge") +
coord_sf(datum = NA) +
theme_minimal() +
theme(
panel.background = element_blank(),
legend.text = element_text(size = 4),
legend.title = element_text(size = 5)
)
interactive_no_use_discharge_map
interactive_no_use_discharge_map <- ggplot(percent_completed_by_state_map) +
geom_sf_interactive(
mapping = aes(
geometry = geometry,
fill = percentage_completed,
tooltip = paste("State FIPS:", stfips, "State Name:", state_name, "<br>Completed:", percentage_completed, "%")
),
color = "#ffffff",
size = 0.25
) +
labs(fill = "% of Completed Treatment Resulting in No Use",
title = "Completed Treatments that end with No Use at Discharge") +
coord_sf(datum = NA) +
theme_minimal()
# Use `girafe` to render the interactive plot
girafe(ggobj = interactive_no_use_discharge_map)- How does the percentage of treatments being completed & percentage of treatments ending with no use vary by the service (completed_cases) and length of stay. (services_d) Create at least 3 visualizations to try and answer this question
- group_by service or LOS, etc.
summary(as.factor(teds_d_select$services_d)) Ambulatory, detoxification
81356
Ambulatory, intensive outpatient
1255393
Ambulatory, non-intensive outpatient
4564225
Detox, 24-hour, free-standing residential
1449619
Detox, 24-hour, hospital inpatient
242358
Rehab/residential, hospital (non-detox)
23570
Rehab/residential, long term (more than 30 days)
721832
Rehab/residential, short term (30 days or fewer)
981477
unknown
4715728
summary(as.factor(teds_d_select$los)) 1 2 3 4 5 6 7 8 9 10
1580286 599892 550056 535766 475859 319425 268916 195634 133531 121493
11 12 13 14 15 16 17 18 19 20
99963 95348 130989 180950 151021 96033 84998 76844 76950 100876
21 22 23 24 25 26 27 28 29 30
150730 118777 83618 77527 71309 72548 111857 217478 155176 125686
31 32 33 34 35 36 37 NA's
939137 722560 1213096 983155 1132884 1263605 721567 18
percent_completed_w_services <- teds_d_select %>%
group_by(stfips, services_d) %>%
summarize(
total_cases = n(),
completed_cases = sum(reason == "completed", na.rm = TRUE)
) %>%
mutate(percent_complete_by_service = (completed_cases / total_cases) * 100)`summarise()` has grouped output by 'stfips'. You can override using the
`.groups` argument.
nlevels(as.factor(percent_completed_w_services$stfips)) # checking to see that all stfips are represented[1] 52
percent_completed_w_services$state_fips <- sprintf('%02d', percent_completed_w_services$stfips)
percent_completed_w_services_map <- full_join(percent_completed_w_services,
states_map,
by = "state_fips")old-style crs object detected; please recreate object with a recent sf::st_crs()
ggplot(percent_completed_w_services_map, aes(x = state_abbv, y = completed_cases, fill = services_d)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(percent_completed_w_services_map, aes(x = state_abbv, y = completed_cases)) +
geom_point(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(percent_completed_w_services_map, aes(x = state_abbv, y = completed_cases)) +
geom_histogram(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))Warning in geom_histogram(stat = "identity"): Ignoring unknown parameters:
`binwidth`, `bins`, and `pad`

percent_completed_w_services <- teds_d_select %>%
group_by(services_d) %>%
summarize(
total_cases = n(),
completed_cases = sum(reason == "completed", na.rm = TRUE)
) %>%
mutate(percent_complete_by_service = (completed_cases / total_cases) * 100)
ggplot(percent_completed_w_services, aes(x = percent_complete_by_service, y = services_d, fill = services_d)) +
geom_bar(stat = "identity") 